/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.atlas.json.io.parser;

import static org.apache.jena.atlas.lib.Chars.CH_COLON;
import static org.apache.jena.atlas.lib.Chars.CH_COMMA;
import static org.apache.jena.atlas.lib.Chars.CH_DOT;
import static org.apache.jena.atlas.lib.Chars.CH_GT;
import static org.apache.jena.atlas.lib.Chars.CH_HASH;
import static org.apache.jena.atlas.lib.Chars.CH_LBRACE;
import static org.apache.jena.atlas.lib.Chars.CH_LBRACKET;
import static org.apache.jena.atlas.lib.Chars.CH_LPAREN;
import static org.apache.jena.atlas.lib.Chars.CH_LT;
import static org.apache.jena.atlas.lib.Chars.CH_MINUS;
import static org.apache.jena.atlas.lib.Chars.CH_PLUS;
import static org.apache.jena.atlas.lib.Chars.CH_QUOTE1;
import static org.apache.jena.atlas.lib.Chars.CH_QUOTE2;
import static org.apache.jena.atlas.lib.Chars.CH_RBRACE;
import static org.apache.jena.atlas.lib.Chars.CH_RBRACKET;
import static org.apache.jena.atlas.lib.Chars.CH_RPAREN;
import static org.apache.jena.atlas.lib.Chars.CH_SEMICOLON;
import static org.apache.jena.atlas.lib.Chars.CR;
import static org.apache.jena.atlas.lib.Chars.EOF;
import static org.apache.jena.atlas.lib.Chars.NL;

import java.io.IOException;
import java.util.NoSuchElementException;

import org.apache.jena.atlas.io.IO;
import org.apache.jena.atlas.io.PeekReader;
import org.apache.jena.atlas.json.JsonParseException;
import org.apache.jena.riot.tokens.StringType;
import org.apache.jena.riot.tokens.Token;
import org.apache.jena.riot.tokens.TokenType;
import org.apache.jena.riot.tokens.Tokenizer;

/** Tokenizer for all sorts of things JSON-ish */
public class TokenizerJSON implements Tokenizer
{
    private Token token = null;
    private final StringBuilder sb = new StringBuilder();
    private final PeekReader reader;
    private boolean finished = false;

    public TokenizerJSON(PeekReader reader) {
        this.reader = reader;
    }

    @Override
    public final boolean hasNext() {
        if ( finished )
            return false;
        if ( token != null )
            return true;
        skip();
        if ( reader.eof() )
            return false;
        token = parseToken();
        return token != null;
    }

    @Override
    public final boolean eof() {
        return hasNext();
    }

    /** Move to next token */
    @Override
    public final Token next() {
        if ( !hasNext() )
            throw new NoSuchElementException();
        Token t = token;
        token = null;
        return t;
    }

    @Override
    public final Token peek() {
        if ( !hasNext() )
            return null;
        return token;
    }

    // ---- Machinery

    // ""-string, ''-string, *X,
    // various single characters . , :;
    // (), [], {}, <>
    // Numbers (integer, decimal, double)
    // Keys (restricted strings, used as keys in maps)
    //  ALPHA (ALPHA,NUMERIC,_,...)

    private Token parseToken() {
        token = new Token(getLine(), getColumn());

        int ch = reader.peekChar();

        // ---- String
        // Support both "" and '' strings (only "" is legal JSON)
        if ( ch == CH_QUOTE1 || ch == CH_QUOTE2 ) {
            token.setType(TokenType.STRING);
            reader.readChar();
            int ch2 = reader.peekChar();
            if ( ch2 == ch ) {
                // Maybe """-strings/'''-strings
                reader.readChar(); // Read potential second quote.
                int ch3 = reader.peekChar();
                if ( ch3 == ch ) {
                    // """-strings/'''-strings
                    reader.readChar();
                    token.setImage(readLong(ch, false));
                    StringType st = (ch == CH_QUOTE1) ? StringType.LONG_STRING1 : StringType.LONG_STRING2;
                    token.setStringType(st);
                    return token;
                }
                // Two quotes then a non-quote.
                // Must be '' or ""
                token.setImage("");
            } else
                // Single quote character.
                token.setImage(allBetween(ch, ch, true, false));
            // Single quoted string.
            StringType st = (ch == CH_QUOTE1) ? StringType.STRING1 : StringType.STRING2;
            token.setStringType(st);
            return token;
        }

        switch (ch) {
                // DOT can't start a decimal in JSON.  Check for digit.
                case CH_DOT:
    //                reader.readChar();
    //                ch = reader.peekChar();
    //                if ( range(ch, '0', '9') )
    //                {
    //                    // Not a DOT after all.
    //                    reader.pushbackChar(CH_DOT);
    //                    // Drop through to number code.
    //                    break;
    //                }
                    token.setType(TokenType.DOT);
                    return token;

            case CH_SEMICOLON :
                reader.readChar();
                token.setType(TokenType.SEMICOLON);
                return token;
            case CH_COMMA :
                reader.readChar();
                token.setType(TokenType.COMMA);
                return token;
            case CH_LBRACE :
                reader.readChar();
                token.setType(TokenType.LBRACE);
                return token;
            case CH_RBRACE :
                reader.readChar();
                token.setType(TokenType.RBRACE);
                return token;
            case CH_LPAREN :
                reader.readChar();
                token.setType(TokenType.LPAREN);
                return token;
            case CH_RPAREN :
                reader.readChar();
                token.setType(TokenType.RPAREN);
                return token;
            case CH_LBRACKET :
                reader.readChar();
                token.setType(TokenType.LBRACKET);
                return token;
            case CH_RBRACKET :
                reader.readChar();
                token.setType(TokenType.RBRACKET);
                return token;

                // Some interesting characters
            case CH_COLON :
                reader.readChar();
                token.setType(TokenType.COLON);
                return token;
                // case CH_UNDERSCORE: reader.readChar();
                // token.setType(TokenType.UNDERSCORE); return token;
            case CH_LT :
                reader.readChar();
                token.setType(TokenType.LT);
                return token;
            case CH_GT :
                reader.readChar();
                token.setType(TokenType.GT);
                return token;
                // GE, LE
        }

        if ( ch == CH_PLUS || ch == CH_MINUS || range(ch, '0', '9') ) {
            readNumber();
            return token;
        }

        // Plain words and prefixes.
        // Can't start with a number due to numeric test above.
        // Can start with a '_' (no blank node test above)

        readKeyWord(token);
        return token;
    }

    private void skip() {
        int ch = EOF;
        for (;; ) {
            if ( reader.eof() )
                return;

            ch = reader.peekChar();
            if ( ch == CH_HASH ) {
                reader.readChar();
                // Comment. Skip to NL
                for (;; ) {
                    ch = reader.peekChar();
                    if ( ch == EOF || isNewlineChar(ch) )
                        break;
                    reader.readChar();
                }
            }

            // Including excess newline chars from comment.
            if ( !isWhitespace(ch) )
                break;
            reader.readChar();
        }
    }

    private void readKeyWord(Token token2) {
        long posn = reader.getPosition();
        token2.setImage(readWord(false));
        token2.setType(TokenType.KEYWORD);
        int ch = reader.peekChar();

        // If we made no progress, nothing found, not even a keyword -- it's an
        // error.
        if ( posn == reader.getPosition() )
            exception(String.format("Unknown char: %c(%d)", ch, ch));
    }

    private String readLong(int quoteChar, boolean endNL) {
        sb.setLength(0);
        for (;; ) {
            int ch = reader.readChar();
            if ( ch == EOF ) {
                if ( endNL )
                    return sb.toString();
                exception("Broken long string");
            }

            if ( ch == quoteChar ) {
                if ( threeQuotes(quoteChar) )
                    return sb.toString();
            }

            if ( ch == '\\' )
                ch = readLiteralEscape();
            insertLiteralChar(sb, ch);
        }
    }

    // Need "readCharOrEscape"

    // Assume have read the first quote char.
    // On return:
    //   If false, have moved over no more characters (due to pushbacks)
    //   If true, at end of 3 quotes
    private boolean threeQuotes(int ch) {
        //reader.readChar();         // Read first quote.
        int ch2 = reader.peekChar();
        if ( ch2 != ch ) {
            // reader.pushbackChar(ch2);
            return false;
        }

        reader.readChar(); // Read second quote.
        int ch3 = reader.peekChar();
        if ( ch3 != ch ) {
            // reader.pushbackChar(ch3);
            reader.pushbackChar(ch2);
            return false;
        }

        // Three quotes.
        reader.readChar(); // Read third quote.
        return true;
    }

    // Read a "word": alphanumerics, "_", ".", "-"
    private String readWord(boolean leadingDigitAllowed) {
        sb.setLength(0);
        int idx = 0;
        if ( !leadingDigitAllowed ) {
            int ch = reader.peekChar();
            if ( Character.isDigit(ch) )
                return "";
        }

        for (;; idx++ ) {
            int ch = reader.peekChar();

            if ( Character.isLetterOrDigit(ch) || ch == '_' || ch == '.' || ch == '-' ) {
                reader.readChar();
                sb.append((char)ch);
                continue;
            } else
                break;

        }

//        // Trailing DOT?
//        // BAD : assumes pushbackChar is infinite.
//        // Check is ends in "."
//        while ( idx > 0 && sb.charAt(idx-1) == CH_DOT )
//        {
//            // Push back the dot.
//            reader.pushbackChar(CH_DOT);
//            sb.setLength(idx-1);
//            idx --;
//        }
        return sb.toString();
    }

    // Make better!
    /*
    [16]    integer         ::=     ('-' | '+') ? [0-9]+
    [17]    double          ::=     ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent )
                                    0.e0, .0e0, 0e0
    [18]    decimal         ::=     ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ )
                                    0.0 .0
    [19]    exponent        ::=     [eE] ('-' | '+')? [0-9]+
    []      hex             ::=     0x0123456789ABCDEFG

    */
    private void readNumber() {
        // One entry, definitely a number.
        // Beware of '.' as a (non) decimal.
        /*
         * maybeSign() digits() if dot ==> decimal, digits if e ==> double,
         * maybeSign, digits else check not "." for decimal.
         */
        boolean isDouble = false;
        boolean isDecimal = false;
        sb.setLength(0);

        int x = 0; // Digits before a dot.
        int ch = reader.peekChar();
        if ( ch == '0' ) {
            x++;
            reader.readChar();
            sb.append((char)ch);
            ch = reader.peekChar();
            if ( ch == 'x' || ch == 'X' ) {
                reader.readChar();
                sb.append((char)ch);
                readHex(reader, sb);
                token.setImage(sb.toString());
                token.setType(TokenType.HEX);
                return;
            }
        } else if ( ch == '-' || ch == '+' ) {
            readPossibleSign(sb);
        }

        x += readDigits(sb);
//        if ( x == 0 )
//        {
//
//        }
        ch = reader.peekChar();
        if ( ch == CH_DOT ) {
            reader.readChar();
            sb.append(CH_DOT);
            isDecimal = true; // Includes things that will be doubles.
            readDigits(sb);
        }

        if ( x == 0 && !isDecimal )
            // Possible a tokenizer error - should not have entered readNumber
            // in the first place.
            exception("Unrecognized as number");

        if ( exponent(sb) ) {
            isDouble = true;
            isDecimal = false;

        }

        token.setImage(sb.toString());
        if ( isDouble )
            token.setType(TokenType.DOUBLE);
        else if ( isDecimal )
            token.setType(TokenType.DECIMAL);
        else
            token.setType(TokenType.INTEGER);
    }

    private static void readHex(PeekReader reader, StringBuilder sb) {
        // Just after the 0x, which are in sb
        int x = 0;
        for (;; ) {
            int ch = reader.peekChar();

            if ( !range(ch, '0', '9') && !range(ch, 'a', 'f') && !range(ch, 'A', 'F') )
                break;
            reader.readChar();
            sb.append((char)ch);
            x++;
        }
        if ( x == 0 )
            exception(reader, "No hex characters after " + sb.toString());
    }

    private boolean exponent(StringBuilder sb) {
        int ch = reader.peekChar();
        if ( ch != 'e' && ch != 'E' )
            return false;
        reader.readChar();
        sb.append((char)ch);
        readPossibleSign(sb);
        int x = readDigits(sb);
        if ( x == 0 )
            exception("Malformed double: " + sb);
        return true;
    }

    private void readPossibleSign(StringBuilder sb) {
        int ch = reader.peekChar();
        if ( ch == '-' || ch == '+' ) {
            reader.readChar();
            sb.append((char)ch);
        }
    }

    private int readDigits(StringBuilder buffer) {
        int count = 0;
        for (;; ) {
            int ch = reader.peekChar();
            if ( !range(ch, '0', '9') )
                break;
            reader.readChar();
            buffer.append((char)ch);
            count++;
        }
        return count;
    }

    private String langTag() {
        sb.setLength(0);
        a2z(sb);
        if ( sb.length() == 0 )
            exception("Bad language tag");
        for (;; ) {
            int ch = reader.peekChar();
            if ( ch == '-' ) {
                reader.readChar();
                sb.append('-');
                int x = sb.length();
                a2zN(sb);
                if ( sb.length() == x )
                    exception("Bad language tag");
            } else
                break;
        }
        return sb.toString();
    }

    private void a2z(StringBuilder sb2) {
        for (;; ) {
            int ch = reader.peekChar();
            if ( isA2Z(ch) ) {
                reader.readChar();
                sb.append((char)ch);
            } else
                return;
        }
    }

    private void a2zN(StringBuilder sb2) {
        for (;; ) {
            int ch = reader.peekChar();
            if ( isA2ZN(ch) ) {
                reader.readChar();
                sb.append((char)ch);
            } else
                return;
        }
    }

    // Blank node label: A-Z,a-z0-9 and '-'
    private String blankNodeLabel() {
        sb.setLength(0);
        boolean seen = false;
        for (;; ) {
            int ch = reader.readChar();
            if ( ch == EOF )
                break;
            if ( !isA2ZN(ch) && ch != '-' )
                break;
            sb.append((char)ch);
            seen = true;
        }
        if ( !seen )
            exception("Blank node label missing");
        return sb.toString();
    }

    // Get characters between two markers.
    // strEscapes may be processed
    // endNL end of line as an ending is OK
    private String allBetween(int startCh, int endCh, boolean strEscapes, boolean endNL) {
        long y = getLine();
        long x = getColumn();
        sb.setLength(0);

        // Assumes first char read already.
//        int ch0 = reader.readChar();
//        if ( ch0 != startCh )
//            exception("Broken parser", y, x);

        for (;; ) {
            int ch = reader.readChar();
            if ( ch == EOF ) {
                if ( endNL )
                    return sb.toString();
                exception("Broken token: " + sb.toString(), y, x);
            }

            if ( ch == '\n' )
                exception("Broken token (newline): " + sb.toString(), y, x);

            if ( ch == endCh ) {
                // sb.append(((char)ch));
                return sb.toString();
            }

            if ( ch == '\\' ) {
                if ( strEscapes )
                    ch = readLiteralEscape();
                else {
                    ch = reader.readChar();
                    if ( ch == EOF ) {
                        if ( endNL )
                            return sb.toString();
                        exception("Broken token: " + sb.toString(), y, x);
                    }

                    switch (ch) {
                        case 'u' :
                            ch = readUnicode4Escape();
                            break;
                        case 'U' :
                            ch = readUnicode4Escape();
                            break;
                        default :
                            exception(String.format("illegal escape sequence value: %c (0x%02X)", ch, ch));
                            break;
                    }
                }
            }
            insertLiteralChar(sb, ch);
        }
    }

    private void insertLiteralChar(StringBuilder buffer, int ch) {
        if ( Character.charCount(ch) == 1 )
            buffer.append((char)ch);
        else {
            // Convert to UTF-16. Note that the rest of any systemn this is used
            // in must also respect codepoints and surrogate pairs.
            if ( !Character.isDefined(ch) && !Character.isSupplementaryCodePoint(ch) )
                exception(String.format("Illegal codepoint: 0x%04X", ch));
            char[] chars = Character.toChars(ch);
            buffer.append(chars);
        }
    }

    @Override
    public long getColumn() {
        return reader.getColNum();
    }

    @Override
    public long getLine() {
        return reader.getLineNum();
    }

    // ---- Character classes

    @Override
    public void close() {
        try {
            reader.close();
        }
        catch (IOException ex) {
            IO.exception(ex);
        }
    }

    private boolean isA2Z(int ch) {
        return range(ch, 'a', 'z') || range(ch, 'A', 'Z');
    }

    private boolean isA2ZN(int ch) {
        return range(ch, 'a', 'z') || range(ch, 'A', 'Z') || range(ch, '0', '9');
    }

    private boolean isNumeric(int ch) {
        return range(ch, '0', '9');
    }

    private static boolean isWhitespace(int ch) {
        return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f';
    }

    private static boolean isNewlineChar(int ch) {
        return ch == '\r' || ch == '\n';
    }

    // ---- Escape sequences

    private final int readLiteralEscape() {
        int c = reader.readChar();
        if ( c == EOF )
            exception("Escape sequence not completed");

        switch (c) {
            case 'n' :
                return NL;
            case 'r' :
                return CR;
            case 't' :
                return '\t';
            case 'b' :
                return '\b';
            case '"' :
                return '"';
            case '/' :
                return '/'; // JSON requires / escapes.
            case '\'' :
                return '\'';
            case '\\' :
                return '\\';
            case 'u' :
                return readUnicode4Escape();
            case 'U' :
                return readUnicode8Escape();
            default :
                exception(String.format("illegal escape sequence value: %c (0x%02X)", c, c));
                return 0;
        }
    }

    private final int readUnicodeEscape() {
        int ch = reader.readChar();
        if ( ch == EOF )
            exception("Broken escape sequence");

        switch (ch) {
            case 'u' :
                return readUnicode4Escape();
            case 'U' :
                return readUnicode8Escape();
            default :
                exception(String.format("illegal escape sequence value: %c (0x%02X)", ch, ch));
        }
        return 0;
    }

    private final int readUnicode4Escape() {
        return readUnicodeEscape(4);
    }

    private final int readUnicode8Escape() {
        int ch8 = readUnicodeEscape(8);
        if ( ch8 > Character.MAX_CODE_POINT )
            exception(String.format("illegal code point in \\U sequence value: 0x%08X", ch8));
        return ch8;
    }

    private final int readUnicodeEscape(int N) {
        int x = 0;
        for ( int i = 0; i < N; i++ ) {
            int d = readHexChar();
            if ( d < 0 )
                return -1;
            x = (x << 4) + d;
        }
        return x;
    }

    private final int readHexChar() {
        int ch = reader.readChar();
        if ( ch == EOF )
            exception("Not a hexadecimal character (end of file)");

        if ( range(ch, '0', '9') )
            return ch - '0';
        if ( range(ch, 'a', 'f') )
            return ch - 'a' + 10;
        if ( range(ch, 'A', 'F') )
            return ch - 'A' + 10;

        exception("Not a hexadecimal character: " + (char)ch);
        return -1;
    }

    private static boolean range(int ch, char a, char b) {
        return (ch >= a && ch <= b);
    }

    private boolean expect(String str) {
        for ( int i = 0; i < str.length(); i++ ) {
            char want = str.charAt(i);
            if ( reader.eof() ) {
                exception("End of input during expected string: " + str);
                return false;
            }
            int inChar = reader.readChar();
            if ( inChar != want ) {
                exception("expected \"" + str + "\"");
                return false;
            }
        }
        return true;
    }

    private void exception(String message) {
        exception(message, reader.getLineNum(), reader.getColNum());
    }

    private static void exception(PeekReader reader, String message) {
        exception(message, reader.getLineNum(), reader.getColNum());
    }

    private static void exception(String message, long line, long col) {
        throw new JsonParseException(message, (int)line, (int)col);
    }
}
